JsonToTable.php
<?php
class JSONToTable {
protected $sourceFile;
protected $outDir;
protected $pdo;
protected $tempSqliteFile;
protected $infoGenFile;
protected $schemaGenFile;
protected $schemaStaticFile;
protected $sqlGenFile;
public function __construct($sourceFile, $outputDir, $pdo=null){
if (!is_dir($outputDir)){
throw new \Exception("Directory doesn't exist.");
}
$this->sourceFile = $sourceFile;
$this->outDir = $outputDir;
$tempSqliteFile = $outputDir.'/placeholder-sqlite-file-'.uniqid().'.sql3';
$this->tempSqliteFile = $tempSqliteFile;
$this->pdo = $pdo;
$this->infoGenFile = $this->outDir.'/schema-info-gen.json';
$this->schemaGenFile = $this->outDir.'/schema-gen.json';
$this->schemaStaticFile = $this->outDir.'/schema.json';
if (!is_file($this->schemaStaticFile)){
$this->schemaStaticFile = dirname($this->outDir).'/schema.json';
}
$this->sqlGenFile = $this->outDir.'/sql-gen';
}
protected function lineToRow($line){
if (trim($line)=='[') return false;
if (trim($line)==']')return false;
$line = substr($line,0,strrpos($line,'}')+1);
$row = json_decode($line, true);
return $row;
}
public function generateSchema($generateIfStaticPresent=true){
if (!$generateIfStaticPresent
&&(file_exists($this->schemaStaticFile)
||file_exists($this->schemaGenFile))){
return;
}
$schemaInfo = $this->getSchemaInfo();
$info = json_encode($schemaInfo, JSON_PRETTY_PRINT);
file_put_contents($this->infoGenFile, $info);
$schema = [];
foreach ($schemaInfo as $col => $data){
$maxLen = $data['maxlen'];
if ($maxLen > 1000){
$type = "TEXT";
} else if ($data['is_date']>0){
$type = 'DATE';
} else if ($data['is_string']>0
&&$data['is_string']>$data['is_numeric']
|| $data['is_array'] > 0
&& $data['is_array'] > $data['is_numeric']
) {
$len = $maxLen + 50;
$type = "VARCHAR({$len})"; //or varchar
} else if ($data['is_float']>0
&& $data['is_float'] > $data['is_int']
){
$type = "FLOAT";
} else if ($data['is_bool']>0){
$type = "BOOL";
} else {
$len = $maxLen;
if ($len < 10)$len = 10;
$type = "int({$len})";
}
$schema[$col] = [
'name'=> $col,
'type'=> $type,
];
}
$schemaJson = json_encode($schema, JSON_PRETTY_PRINT);
file_put_contents($this->schemaGenFile, $schemaJson);
}
protected function getSchemaInfo(){
$schemaData = [];
$sampleData = [
'maxlen'=> 0,
'minlen'=> false,
'count' => 0,
'is_string' => 0,
'is_numeric' => 0,
'is_int' => 0,
'is_float'=> 0,
'is_bool' => 0,
'is_array'=> 0,
'is_date' => 0,
'is_json' => 0,
'sampleValue' => '',
];
//loop over every row
//modify keys of schemaDataArray
// finally json_encode & output the schema data to an info file <- For the developer to inspect their input data
// json_encode & output an actual schema-file that details how to create the table.
$reader = fopen($this->sourceFile, 'r');
while ($line = fgets($reader)) {
$row = $this->lineToRow($line);
if ($row==false)continue;
foreach ($row as $column => $value){
$data = $schemaData[$column] ?? $sampleData;
$data['count']++;
$valueForLen = is_array($value) ? json_encode($value) : $value;
$len = strlen($valueForLen);
if ($len===false || $data['maxlen'] > $len){
$data['maxlen'] = $data['maxlen'];
} else {
$data['maxlen'] = $len;
$data['sampleValue'] = substr($valueForLen, 0, 1000);
}
$data['minlen'] = ($len == false || $data['minlen'] < $len) && $data['minlen'] !== false ? $data['minlen'] : $len;
$data['is_string'] += is_string($value) ? 1 : 0;
$data['is_bool'] += is_bool($value) ? 1 : 0;
$data['is_array'] += is_array($value) ? 1 : 0;
$data['is_date'] += $this->is_date($value) ? 1 : 0;
if (is_numeric($value)){
$data['is_numeric'] += 1;
$data['is_int'] += is_int($value) || (int)$value==$value ? 1 : 0;
$data['is_float'] += is_float($value) || (float)$value==$value ? 1 : 0;
}
if (is_string($value)){
$json = json_decode($value, true);
$data['is_json'] += ($json===null) ? 0 : 1;
}
$schemaData[$column] = $data;
}
}
fclose($reader);
return $schemaData;
}
public function generateSql($tableName,$dropIfExists = true, $chunkByLength=false){
$schemaFile = file_exists($this->schemaStaticFile) ? $this->schemaStaticFile
: (file_exists($this->schemaGenFile) ? $this->schemaGenFile : null);
if ($schemaFile==null){
$this->generateSchema();
$schemaFile = $this->schemaGenFile;
}
$schemaJson = json_decode(file_get_contents($schemaFile), true);
if ($schemaJson==null)throw new \Exception ("Schema File '{$schemaFile}' does NOT have valid json");
$this->generateSqlCreate($tableName, $schemaJson, $dropIfExists);
$this->generateSqlInsert($tableName, $schemaJson, $chunkByLength);
}
public function generateSqlCreate($tableName, $schemaJson, $dropIfExists){
$colStatements = [];
foreach ($schemaJson as $col => $info){
$statement = '`'.$info['name'].'` '.$info['type'];
$colStatements[] = $statement;
}
$colsSql = implode(", ", $colStatements);
$drop = $dropIfExists ? "DROP TABLE IF EXISTS `{$tableName}`;\n" : '';
$sql =
<<<SQL
{$drop}
CREATE TABLE `{$tableName}`
(
{$colsSql}
)
;
SQL;
file_put_contents($this->sqlGenFile.'-create.sql',$sql);
}
public function generateSqlInsert($tableName, $schemaJson, $chunkByLength){
$pdo = $this->pdo;
if ($pdo==null)$pdo = new PDO('sqlite:'.$this->tempSqliteFile);
$reader = fopen($this->sourceFile, 'r');
$cols = array_combine(array_keys($schemaJson), array_keys($schemaJson));
$colsSql = '`'.implode('`,`', $cols).'`';
$insert =
<<<SQL
INSERT INTO `{$tableName}`
( ${colsSql} )
VALUES
SQL;
//goto here
$rowCount = 0;
$fileIndex = 0;
$writer = null;
$rows = [];
while ($line = fgets($reader)) {
$row = $this->lineToRow($line);
if ($row==false)continue;
$rowCount++;
$row = array_filter($row,
function($value, $key) use ($cols){
return isset($cols[$key]);
}, ARRAY_FILTER_USE_BOTH
);
$row = array_map(
function($value) use ($pdo){
if (is_array($value)){
$value = json_encode($value);
}
if (is_string($value))$value = $pdo->quote($value);
else if (strlen($value)==0)$value = 'NULL';
// $value = '"'.substr($value,1,-1).'"';
// return "{$value}";// Why am I returning it like that?
return $value;
}, $row
);
$data = [];
foreach ($cols as $name){
$data[] = $row[$name] ?? '0';
}
// $leadComma = $rowCount===1 ? '' : ',';
$rowSql = "\n".'( '. implode(', ',$data).')';
// echo "\n$rowCount";
if ($writer === null || $chunkByLength!==false && $len + strlen($rowSql) > $chunkByLength){
// finish what we have. This line gets written to next file
// file name increments by 1
if ($writer!==null){
fwrite($writer, "\n;\n");
fclose($writer);
}
$writer = fopen($this->sqlGenFile."-insert-{$fileIndex}.sql", 'w');
fwrite($writer, $insert);
$fileIndex++;
$rowCount = 1;
$len = strlen($insert);
}
if ($rowCount!==1){
$rowSql = ",".$rowSql;
}
fwrite($writer,$rowSql);
$len += strlen($rowSql);
}
fwrite($writer, "\n;\n");
fclose($writer);
fclose($reader);
unlink($this->tempSqliteFile);
}
public function is_date($value){
if (!is_string($value))return false;
return preg_match('/[0-9]{4}\-[0-9]{2}\-[0-9]{2}/', $value);
}
}